/*** Trends - Empirical Application

	---- DESCRIPTIVE EVIDENCE ----

This code produces descriptive evidence/graphs/tables

1. table sample sizes and reform shares (Table A.2)
2. plot mean and variance of years of schooling over cohorts and generations  (Figure A.5)
3. compute and plot intergenerational coefficients together with reform expansion (Figure 3)

**************************************************************************************/

* STATA settings:
clear

* Log file

capture log close
log using ${path2}Logs/${ver}/descriptive.log, text replace

* Load data and select sample
use ${path2}Dat/main_file.dta, replace 

keep if urval==1 | ((f_urval==1 | m_urval==1) & foddar>1967)

* censored education variable for offspring
gen educyrs16=educyrs
replace educyrs16=16 if educyrs>16 & educyrs!=.

** 1. Tabulate sample sizes and reform shares 
	
	preserve
	keep if foddar>=1943 & foddar<=1972 
	
	global x1_income "f_l_lifeinc5359"
	global x2_income "f_l_lifeinc3545"
	global y_income "l_lifeinc3035"
	global x_educ_70 "f_educyrs70"
	global x_educ_90 "f_educyrs90"
	global y_educ "educyrs"
	

	gen t_sample_1 = (sample_2==1 & experiment!=.) if foddar<1960	
	replace t_sample_1 = (sample_2==1 & f_experiment!=.) if foddar>1959
	gen t_sample_2 = (sample_2==1 & ${x_educ_70}!=. & ${y_educ}!=. & experiment!=.) if foddar<1960
	replace t_sample_2 = (sample_2==1 & (${x_educ_70}!=. | ${x_educ_90}!=.) & ${y_educ}!=. & f_experiment!=.) if foddar>1959
	
	gen t_sample_3 = (sample_2==1 & ${x1_income}!=. & ${y_income}!=. & ${x1_income}>log(10000) & ${y_income}>log(10000) & experiment!=.) if foddar<1960
	replace t_sample_3 = (sample_2==1 & ${x2_income}!=. & ${y_income}!=. & ${x2_income}>log(10000) & ${y_income}>log(10000) & f_experiment!=.) if foddar>1959
	replace t_sample_3=0 if 1970-f_byear>55	
	
	gen sam1_exp=experiment if t_sample_1==1 
	gen sam1_exp_f=f_experiment if t_sample_1==1 
	gen sam2_exp=experiment if t_sample_2==1 
	gen sam2_exp_f=f_experiment if t_sample_2==1 
	gen sam3_exp=experiment if t_sample_3==1 
	gen sam3_exp_f=f_experiment if t_sample_3==1 
	
	collapse (count) all=lopnr (mean) exp=experiment exp_f=f_experiment (sum) sam1_sum=t_sample_1 (mean) sam1_s=t_sample_1 sam1_exp sam1_exp_f ///
	 (sum) sam2_sum=t_sample_2 (mean) sam2_s=t_sample_2 sam2_exp sam2_exp_f (sum) sam3_sum=t_sample_3 (mean) sam3_s=t_sample_3 sam3_exp sam3_exp_f, by(foddar)
	 
	label var all "# obs."
	label var exp "share reform school, offspring"
	label var exp_f "share reform school, fathers"
	
	label var sam1_sum "# obs. intergen. sample"
	label var sam1_s   "share of total obs."
	label var sam1_exp "share reform, offspring"
	label var sam1_exp_f "share reform, father"

	label var sam2_sum "# obs. ig. educ. sample"
	label var sam2_s   "share of total obs."
	label var sam2_exp "share reform, offspring"
	label var sam2_exp_f "share reform, father"
	
	label var sam3_sum "# obs. ig. inc. sample"
	label var sam3_s   "share of total obs."
	label var sam3_exp "share reform, offspring"
	label var sam3_exp_f "share reform, father"
	
		
	estpost tabstat all exp exp_f sam1_* sam2_* sam3_*, by(foddar) stats(mean) columns(variables) nototal
	esttab . using ${path2}Logs/${ver}/table_sample_sizes_allcohorts.tex , nogaps label cells("all(fmt(%9.0fc)) exp(fmt(2)) exp_f(fmt(2)) sam1_sum(fmt(%9.0fc)) sam1_exp(fmt(2)) sam1_exp_f(fmt(2)) sam2_sum(fmt(%9.0fc)) sam2_exp(fmt(2)) sam2_exp_f(fmt(2)) sam3_sum(fmt(%9.0fc)) sam3_exp(fmt(2)) sam3_exp_f(fmt(2))") tex replace page 
	esttab . using ${path2}Logs/${ver}/table_sample_sizes_allcohorts.xls , nogaps label cells("all(fmt(%9.0fc)) exp(fmt(2)) exp_f(fmt(2)) sam1_sum(fmt(%9.0fc)) sam1_exp(fmt(2)) sam1_exp_f(fmt(2)) sam2_sum(fmt(%9.0fc)) sam2_exp(fmt(2)) sam2_exp_f(fmt(2)) sam3_sum(fmt(%9.0fc)) sam3_exp(fmt(2)) sam3_exp_f(fmt(2))") csv replace page 
	restore

* Compute reform shares for everybody ("sample 0") 
	
	global sampl=0
	local par_ed "f_educyrs"
	local par_byear "f_byear"
	local par_age "f_ageatbirth"
	local par_exp "f_experiment"
     
     	** (0) share of reform offspring and parents
	gen g_reform=.
	gen g_reform_f=.
	gen g_reform_f33=.
	gen g_year=.
	label var g_reform "offspring              "
	label var g_reform_f "fathers                "
	label var g_reform_f33 "fathers (interg.sample)"
	
	local obs=0
	foreach cohort of numlist 1940/1990 {
		di "Cohort: " `cohort'
		local obs=`obs'+1
		
		replace g_year=`cohort' in `obs'
		su experiment if foddar==`cohort'
		replace g_reform=r(mean) in `obs'
		su `par_exp' if foddar==`cohort'
		replace g_reform_f=r(mean) in `obs'
		su `par_exp' if foddar==`cohort' & `par_byear'!=. & `par_age'<33
		replace g_reform_f33=r(mean) in `obs'
	}
	preserve 
	keep g_*
	drop if g_year==.
	save ${path2}Dat/reformshares_${sampl}.dta, replace 
	restore
	drop g_*

 	* Settings to use for remainder of code
	
	global sampl=2
	keep if sample_2 == 1

	local par_ed "f_educyrs"
	local par_byear "f_byear"
	local par_age "f_ageatbirth"
	local par_exp "f_experiment"
	
** 2. Mean and variance of years of schooling over cohorts and generations 
     
	** 1990+ educyrs variable (1920-1976)
 
	global minyear = 1920
	global maxyear = 1976			

	* all/offspring				
	foreach cohort of numlist $minyear/$maxyear {
		di "All, Cohort: " `cohort'
		su educyrs if foddar==`cohort'
		scalar all_mean_`cohort' = r(mean)
		scalar all_sd_lb_`cohort' = r(mean) - r(sd)
		scalar all_sd_ub_`cohort' = r(mean) + r(sd)
		scalar all_var_`cohort' = r(Var)
	}
	
	** 1970 educyrs70 variable (1911-1945)

	* matched parents over parent cohorts - educyrs70
	foreach cohort of numlist 1911/1945 {
		di "Cohort: " `cohort'
		su `par_ed'70 if `par_byear'==`cohort'
		scalar parents70_mean_`cohort'  = r(mean)
		scalar parents70_sd_lb_`cohort' = r(mean) - r(sd)
		scalar parents70_sd_ub_`cohort' = r(mean) + r(sd)	
		scalar parents70_var_`cohort' = r(Var)		
	} 
	
	* Plot means and variance of parental and offspring years of schooling over cohorts
	foreach group in all parents70 {
		gen g_`group'_year=.
		gen g_`group'_mean=.
		gen g_`group'_sd_lb=.
		gen g_`group'_sd_ub=.
		gen g_`group'_var=.
		foreach y of numlist 1911/1990 {
			local obs= `y'-1910
			replace g_`group'_year=`y' in `obs'	
			cap replace g_`group'_mean=`group'_mean_`y' in `obs'
			cap replace g_`group'_sd_lb=`group'_sd_lb_`y' in `obs'
			cap replace g_`group'_sd_ub=`group'_sd_ub_`y' in `obs'	
			cap replace g_`group'_var=`group'_var_`y' in `obs'	
		}
		label var g_`group'_year "Cohort"
		label var g_`group'_mean "Mean (`group')" 
		label var g_`group'_sd_lb ""
		label var g_`group'_sd_ub  ""
		label var g_`group'_var "Variance (`group')"
	}	
			
	* parents educyrs70 (1911-1940) and all/offspring educyrs90 (1933-1972)
	twoway ///
	(line g_parents70_mean g_parents70_year if g_parents70_year>=1911 & g_parents70_year<=1935 , color(black) msize(small)) /// 
	(line g_all_mean g_all_year if g_all_year>=1933 & g_all_year<=1972 , color(black) msize(small))  /// 
	, scheme(s2mono) graphregion(color(white)) xtitle("cohort")  xline(1943 1955 , lcolor(red) style(unextended)) xmtick(##5) xlabel(1910(5)1970) ylabel(8(1)13.5) /// 
	legend(region(lcolor(white))) legend(label(1 "Cohorts fathers") label(2 "Cohorts offspring") size(small) symxsize(8) symysize(0.5)) xsize(5.5) ysize(2.3) name(mean,replace) title("(A) Mean:") scale(0.8)

	twoway ///
	(line g_parents70_var g_parents70_year if g_parents70_year>=1911 & g_parents70_year<=1935  , color(black) msize(small)) /// 
	(line g_all_var g_all_year if g_all_year>=1933 & g_all_year<=1972 , color(black)  msize(small))  /// 
	, scheme(s2mono) graphregion(color(white)) xtitle("cohort")  xline(1943 1955 , lcolor(red) style(unextended)) xmtick(##5) xlabel(1910(5)1970) ylabel(4(1)9.5) ///
	legend(region(lwidth(none) lcolor(white))) legend(off) xsize(5.5) ysize(2.3) name(variance,replace) title("(B) Variance:") scale(0.8)

	grc1leg mean variance , rows(2) graphregion(color(white)) legendfrom(mean) 
	qui graph save	 ${path2}Logs/${ver}/Trend_EducYrs_1911_1972_Mean_and_Var_Parent70All90_Smp${sampl}.gph , replace
	qui graph export ${path2}Logs/${ver}/Trend_EducYrs_1911_1972_Mean_and_Var_Parent70All90_Smp${sampl}.eps , replace
	
	drop g_*
	cap drop est_*

** 3. Compute and plot mobility trends over cohorts

	est clear

	* Regressions using f_educyrs70/p_educyrs70 for parent generation
	forvalues y=1940/1967 {	

		local par_ed "f_educyrs"
		local par_byear "f_byear"
		local par_age "f_ageatbirth"
		local par_exp "f_experiment"


		local Coef70_title = "Intergenerational Education Coefficient, Sons"
		reg educyrs `par_ed'70			if gender==0 & foddar==`y' , ro
		est store Coef70_`y'	


		* restrict to parents younger than 30 to plot trends before 1943
		local Coefb3070_title = "Intergenerational Education Coefficient, Sons and Daughters, subsample with `par_age'<30"
		qui reg educyrs `par_ed'70 		if foddar==`y' & `par_age'<30 , ro
		est store Coefb3070_`y'
	}					
	drop _est* 


	* Saving coefficients of regressions based on 1970 education variable into g_* variables
	gen int g_year=. in 1/100
	local coeftype "Coefb30" 
		gen g_coef70_`coeftype'=.	in 1/100
		gen g_ci_lb70_`coeftype'=.	in 1/100
		gen g_ci_ub70_`coeftype'=.	in 1/100
		forvalues y=1940(1)1967 {		
			local obs= `y'-1939
			cap replace g_year=`y' in `obs'	
			est restore `coeftype'70_`y'
			scalar coef_`y' = _b[`par_ed'70]
			scalar bound = _se[`par_ed'70]*invttail(e(df_r),0.5*(1-c(level)/100))
			scalar ci_lb_`y' = _b[`par_ed'70] - bound
			scalar ci_ub_`y' = _b[`par_ed'70] + bound
			cap replace g_coef70_`coeftype'=coef_`y' in `obs'
			cap replace g_ci_lb70_`coeftype'=ci_lb_`y' in `obs'
			cap replace g_ci_ub70_`coeftype'=ci_ub_`y' in `obs'	
		}	
	est clear


	* Regressions using combined 1970/90+ education variable for parent generation
	forvalues y=1943/1973 {
	
		* using combined 1970/90+ education variables for parents
		local  Coefmix7090_title = "Intergenerational Education Coefficient, Sons, Combined f_educyrs70 and f_educyrs90"
		qui reg educyrs `par_ed' if foddar==`y' 
		est store Coefmix7090_`y'
	}

	* Saving coefficients of regressions based on combined 1970/90+ education variable into g_* variables	
		local coeftype "Coefmix7090"
		gen g_coef_`coeftype'=.		in 1/100
		gen g_ci_lb_`coeftype'=.	in 1/100
		gen g_ci_ub_`coeftype'=.	in 1/100
		forvalues y=1943(1)1973 {		
			local obs= `y'-1939
			cap replace g_year=`y' in `obs'	
			est restore `coeftype'_`y'
			scalar coef_`y' = _b[`par_ed']
			scalar bound = _se[`par_ed']*invttail(e(df_r),0.5*(1-c(level)/100))
			scalar ci_lb_`y' = _b[`par_ed'] - bound
			scalar ci_ub_`y' = _b[`par_ed'] + bound
			cap replace g_coef_`coeftype'=coef_`y' in `obs'
			cap replace g_ci_lb_`coeftype'=ci_lb_`y' in `obs'
			cap replace g_ci_ub_`coeftype'=ci_ub_`y' in `obs'	
		}

		
	* Store coefficient estimates
	keep g_*
	drop if g_year==.
	save ${path2}Dat/educationalcoefficient${sampl}.dta, replace

	* Plot combined graph with reform shares and intergen. coefficient

	use "${path2}Dat/reformshares_0.dta", replace
	drop if g_year==.

	merge 1:m g_year using "${path2}Dat/educationalcoefficient2.dta", nogen 


	twoway (area g_reform g_reform_f g_year , yaxis(2) color(gs10 black) lwidth(none none)) ///
		(connected g_coef70_Coefb30 g_year if g_year>=1940 & g_year<=1945 , yaxis(1) color(black) lpattern(dash) msymbol(R) msize(small)) ///
		(connected g_coef_Coefmix7090 g_year if g_year>=1943 & g_year<=1972 , yaxis(1) yscale(alt) yscale(alt axis(2)) color(black) msymbol(S) msize(small))   ,   ///  
		scheme(s2mono) graphregion(color(white)) xtitle("cohort") xmtick(##5) yscale(titlegap(*+7)) ytitle("intergenerational coefficient") ytitle("reform share", axis(2)) xlabel(1940(5)1990) legend(region(lcolor(white)) ///
		cols(3) order(4 1 2 ) label(1 "reform share, offspring") label(2 "reform share, fathers") label(4 "intergen. coeff.")) ///
		xsize(5.6) ysize(3.25) 

	qui graph export ${path2}/Logs/${ver}/Combine_Shares_IGC.eps , replace	
	qui graph save	 /${path2}/Logs/${ver}/Combine_Shares_IGC.gph , replace

cap log close
